Importing necessary packages
Load the csv data and test that it loaded
df = read.csv('data.csv')
head(df)
## fips lat long county province_state combined_key date
## 1 1001 32.5395 -86.6441 Autauga Alabama Autauga, Alabama, US 2021-08-01
## 2 1003 30.7277 -87.7221 Baldwin Alabama Baldwin, Alabama, US 2021-08-01
## 3 1005 31.8683 -85.3871 Barbour Alabama Barbour, Alabama, US 2021-08-01
## 4 1007 32.9964 -87.1251 Bibb Alabama Bibb, Alabama, US 2021-08-01
## 5 1009 33.9821 -86.5679 Blount Alabama Blount, Alabama, US 2021-08-01
## 6 1011 32.1003 -85.7127 Bullock Alabama Bullock, Alabama, US 2021-08-01
## tests_combined_total
## 1 29714
## 2 118240
## 3 12471
## 4 14814
## 5 27943
## 6 6155
Clean up the data
- Remove empty string states
- Remove rows that have NA tests
- Select the state and test amount, the rest get ignored
- Group by the state
- Get the max of every total (max of every total = the complete total
for that state)
- Remove states with a 0 total (0 total = no data)
df <- df %>%
filter(province_state != "") %>% #1
filter(!is.na(tests_combined_total)) %>% #2
select(province_state, tests_combined_total) %>% #3
group_by(province_state) %>% #4
summarize(tests_combined_total = max(tests_combined_total)) %>% #5
filter(tests_combined_total != 0) #6
df
## # A tibble: 33 × 2
## province_state tests_combined_total
## <chr> <int>
## 1 Alabama 1787546
## 2 Alaska 1680253
## 3 Arizona 12408935
## 4 Arkansas 891151
## 5 California 73014784
## 6 Connecticut 4273086
## 7 Delaware 2099374
## 8 District of Columbia 3067883
## 9 Hawaii 2697248
## 10 Illinois 11237849
## # … with 23 more rows
Rename columns
- province_state –> state
- tests_combined_total –> total_tests
df <- rename(df, "state" = "province_state")
df <- rename(df, "total_tests" = "tests_combined_total")
df
## # A tibble: 33 × 2
## state total_tests
## <chr> <int>
## 1 Alabama 1787546
## 2 Alaska 1680253
## 3 Arizona 12408935
## 4 Arkansas 891151
## 5 California 73014784
## 6 Connecticut 4273086
## 7 Delaware 2099374
## 8 District of Columbia 3067883
## 9 Hawaii 2697248
## 10 Illinois 11237849
## # … with 23 more rows
Map it
library(usmap)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
map <- plot_usmap(data = df, values = "total_tests", color = "grey") +
scale_fill_continuous(
low = "white", high = "#46e8fa", name = "Covid Tests by State (2020)", label = scales::comma
) + theme(legend.position = "left")
map

Make it interactive
df$text <- paste(df$state, "<br>Total Tests: ", df$total_tests)
map_plotly <- ggplotly(map)
map_plotly
Finished map